package com.galeno.sparksql02

import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}

/**
 * @Title: ${file_name}
 * @Description: ${todo}
 * @author galeno
 * @date 2021/9/619:40
 */
object Hive分区表操作 {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .master("local")
      .appName("Hello")
      .enableHiveSupport()
      .getOrCreate()
    import spark.implicits._
    val rdd= spark.sparkContext.makeRDD(Seq(
      (1, "zs", "addcart", "2021-09-05"),
      (1, "zs", "collect", "2021-09-05"),
      (1, "zs", "pageview", "2021-09-05"),
      (2, "bb", "addcart", "2021-09-05"),
      (2, "bb", "addcart", "2021-09-05"),
      (2, "bb", "addcart", "2021-09-06"),
      (3, "cc", "adshow", "2021-09-06"),
      (3, "cc", "addcart", "2021-09-06"),
      (3, "cc", "adclick", "2021-09-06"),
      (2, "bb", "addcart", "2021-09-06"),
    ))
    val df: DataFrame = rdd.toDF("id", "name", "event", "dt")
    // 写入hive,写成分区表（按dt分区）
    // insert into table event_detail partition(dt='2021-09-05')  select id,name,event  from df   静态分区
    // insert into table event_detail partition(dt) select id,name,event,dt from df   动态分区
    // 这种方式其实属于动态分区
    // df.write.partitionBy("dt").saveAsTable("event_detail")
   // df.write.mode(SaveMode.Append).partitionBy("dt").saveAsTable("event_detail")

    val rdd2 = spark.sparkContext.makeRDD(Seq(
      (1, "zs", "addcart", "2021-09-07"),
      (1, "zs", "collect", "2021-09-07"),
      (2, "bb", "addcart", "2021-09-07"),
      (2, "bb", "addcart", "2021-09-07"),
      (2, "bb", "addcart", "2021-09-07"),
      (3, "cc", "addcart", "2021-09-07"),
      (3, "cc", "adclick", "2021-09-07"),
      (2, "bb", "addcart", "2021-09-07"),
    ))

    val df2 = rdd2.toDF("id", "name", "event", "dt")
    df2.write.mode(SaveMode.Append).partitionBy("dt").saveAsTable("event_detail")




  }

}
